The cost function

$$f(x, y) = \frac{1}{3^{-x^2 - y^2} + 1}$$

$$ f(x, y) = \frac{1}{r + 1}$$ where $r$ is $3^{-x^2 - y^2}$

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d.axes3d import Axes3D
from matplotlib import cm 
from math import log
In [2]:
# Mesh grid demo
In [3]:
x_bm = np.linspace(start=-3, stop=3, num=7)
y_bm = np.linspace(start=-3, stop=3, num=7)
In [4]:
x_bm
Out[4]:
array([-3., -2., -1.,  0.,  1.,  2.,  3.])
In [5]:
y_bm
Out[5]:
array([-3., -2., -1.,  0.,  1.,  2.,  3.])
In [6]:
#Plot before meshgrid
plt.figure(figsize=[10, 8])
plt.scatter(x_bm, y_bm, s = 100, color = 'blue', alpha = 0.5)
plt.ylim(-3.1, 3.1)
plt.xlim(-3.1, 3.1)
plt.show()
In [7]:
x_bm.ndim
Out[7]:
1
In [8]:
y_bm.ndim
Out[8]:
1
In [9]:
x_am, y_am = np.meshgrid(x_bm, y_bm)
print('Array after meshgrid: ', x_am.shape)
Array after meshgrid:  (7, 7)
In [10]:
x_am.ndim
Out[10]:
2
In [11]:
y_am.ndim
Out[11]:
2
In [12]:
# Plotting the mesh_grid
plt.figure(figsize=[10, 8])
plt.scatter(x_am, y_am, s = 100, color = 'blue', alpha = 0.5)
plt.xlabel('X', fontsize = 30)
plt.ylabel('Y', fontsize = 30)
plt.ylim(-3.1, 3.1)
plt.xlim(-3.1, 3.1)
plt.show()
In [13]:
def f(x, y):
    r = 3**(-x**2 - y**2)
    return 1 / (r + 1)
In [14]:
x_4 = np.linspace(start=-2, stop=2, num=200)
y_4 = np.linspace(start=-2, stop=2, num=200)
In [15]:
x_4.shape
Out[15]:
(200,)
In [16]:
x_4.ndim
Out[16]:
1
In [17]:
y_4.shape
Out[17]:
(200,)
In [18]:
x_4, y_4 = np.meshgrid(x_4, y_4)
print('Array after meshgrid: ', x_4.shape)
Array after meshgrid:  (200, 200)
In [19]:
x_4.ndim
Out[19]:
2
In [20]:
y_4.ndim
Out[20]:
2
In [21]:
fig = plt.figure(figsize=[16, 12])




ax = fig.gca(projection = '3d')


ax.set_xlabel('X', fontsize=30)
ax.set_ylabel('Y', fontsize=30)
ax.set_zlabel('f(x, y) - Cost', fontsize=30)



plt.show()
In [22]:
x_4.shape
Out[22]:
(200, 200)
In [23]:
fig = plt.figure(figsize=[16, 12])



ax = fig.gca(projection = '3d')


ax.set_xlabel('X', fontsize=30)
ax.set_ylabel('Y', fontsize=30)
ax.set_zlabel('f(x, y) - Cost', fontsize=30)

ax.plot_surface(x_4, y_4, f(x_4, y_4), cmap=cm.coolwarm, alpha = 0.5)

plt.show()

The cost function, and its partial derivatives

$$f(x, y) = \frac{1}{3^{-x^2 - y^2} + 1}$$

Partial Derivatives

$$\frac{\partial f}{\partial x} = \frac{2x \ln(3) \cdot 3^{-x^2 - y^2}}{\left( 3^{-x^2 - y^2} + 1 \right) ^2}$$

$$\frac{\partial f}{\partial y} = \frac{2y \ln(3) \cdot 3^{-x^2 - y^2}}{\left( 3^{-x^2 - y^2} + 1 \right) ^2}$$

image.png

In [24]:
def fpx(x,y):
    r = 3**(-x**2 - y**2)
    return 2*x*log(3)*r / (r+1)**2



def fpy(x, y):
    r = 3**(-x**2 - y**2)
    return 2*y*log(3)*r / (r + 1)**2
In [25]:
# the loop for the gradient descent
In [26]:
multiplier = 0.1
max_iter = 1000
params = np.array([1.8, 1.0]) 
In [27]:
params[0]
Out[27]:
1.8
In [28]:
params[1]
Out[28]:
1.0
In [29]:
print('The cost when I start is : ', f(params[0], params[1]))
The cost when I start is :  0.9906047940325824
In [30]:
for n in range(max_iter):
    
    gradient_x = fpx(params[0], params[1])
    
    gradient_y = fpy(params[0], params[1])
    
    gradients = np.array([gradient_x, gradient_y])
    
    params = params - multiplier * gradients
In [31]:
#Results
In [32]:
print('Values in gradient array', gradients)
Values in gradient array [1.08410585e-23 6.02281029e-24]
In [33]:
print('Minimum occurs at x value of: ', params[0])
Minimum occurs at x value of:  1.865180758685096e-23
In [34]:
print('Minimum occurs at y value of: ', params[1])
Minimum occurs at y value of:  1.0362115326028303e-23
In [35]:
print('The cost is: ', f(params[0], params[1]))
    
The cost is:  0.5
In [ ]:
# The lines below are for explanation
In [36]:
params.shape
Out[36]:
(2,)
In [37]:
params.ndim
Out[37]:
1
In [38]:
params
Out[38]:
array([1.86518076e-23, 1.03621153e-23])
In [39]:
values_array = params.reshape(1,2)
In [40]:
values_array
Out[40]:
array([[1.86518076e-23, 1.03621153e-23]])
In [41]:
values_array.shape
Out[41]:
(1, 2)
In [42]:
values_array.ndim
Out[42]:
2
In [43]:
# The lines above are for explanation

Plot...

In [45]:
multiplier = 0.1
max_iter = 1000
params = np.array([1.8, 1.0])
values_array = params.reshape(1,2)

for n in range(max_iter):
    
    gradient_x = fpx(params[0], params[1])
    gradient_y = fpy(params[0], params[1])
    gradients = np.array([gradient_x, gradient_y])
    params = params - multiplier * gradients
    values_array = np.append(values_array, params.reshape(1, 2), axis=0)

# Results...

print('shape of params:', params.shape, '\n')

print('shape of values_array:', values_array.shape, '\n')

print('Printing Params:', params, '\n')

print('Printing Values_array:', '\n''\n', values_array,'\n')

print('Values in gradient array:', gradients, '\n')

print('Minimum occurs at x value of: ', params[0], '\n')

print('Minimum occurs at y value of: ', params[1],'\n')

print('The cost is: ', f(params[0], params[1]))
shape of params: (2,) 

shape of values_array: (1001, 2) 

Printing Params: [1.86518076e-23 1.03621153e-23] 

Printing Values_array: 

 [[1.80000000e+00 1.00000000e+00]
 [1.79631910e+00 9.97955057e-01]
 [1.79257650e+00 9.95875836e-01]
 ...
 [2.08830312e-23 1.16016840e-23]
 [1.97359134e-23 1.09643964e-23]
 [1.86518076e-23 1.03621153e-23]] 

Values in gradient array: [1.08410585e-23 6.02281029e-24] 

Minimum occurs at x value of:  1.865180758685096e-23 

Minimum occurs at y value of:  1.0362115326028303e-23 

The cost is:  0.5
In [54]:
fig = plt.figure(figsize=[16, 12])
ax = fig.gca(projection='3d')

ax.set_xlabel('X', fontsize=20)
ax.set_ylabel('Y', fontsize=20)
ax.set_zlabel('f(x, y) - Cost', fontsize=20)

ax.plot_surface(x_4, y_4, f(x_4, y_4), cmap=cm.coolwarm, alpha=0.4)


ax.scatter(values_array[:, 0], values_array[:, 1], 
           f(values_array[:, 0], values_array[:, 1]), s=50, color='red')


plt.show()
In [55]:
# the MSE

$$MSE = \frac{1}{n} \sum_{i=1}^{n} \big( y_i - (\theta_0 + \theta_1x_i)\big)^2 $$

image.png

image.png

image.png